#!/usr/bin/perl

###############################################################################
# 
# LICENSE
#
#  Copyright (C) 2008, 2009, 2013, 2014 J.D. Smith
#
#  This file is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published
#  by the Free Software Foundation; either version 2, or (at your
#  option) any later version.
#
#  This File is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this file; see the file COPYING.  If not, write to the
#  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
#  Boston, MA 02110-1301, USA.
#
###############################################################################

################################################################################

=head1 NAME

timedog - script to display the files backed up by time machine

=head1 SYNOPSIS

  timedog -?
  timedog -t
  timedog [-vlsHn] [-d depth] [-S sort] [-m limit] [-D backupdir] [timestamp]

=head1 DESCRIPTION

Display the files which time machine backed up in its most recent
backup (or any of your choosing).  Uses the fact that Time Machine
creates hard links for unchanged files and directories.  Reports old
and new sizes of the changed files, along with the total file count
and backup size.

The script will locate the Time Machine directory using the system
utility tmutil. To run the script, you must ensure the Time Machine
volume is mounted. The Time Machine backup can normally be found in a
directory called e.g., /Volumes/TM/Backups.backupdb/hostname.

Written by J.D. Smith (jdtsmith A@T gmail _d0t_ com), adaptations by
Hayo Baan (info A@T hayobaan _d0t_ nl).

=head2 Options

=over 8

=item B<-?> or B<-h> or B<--help>

Show full help.

=item B<-t>

List all available backup timestamps

=item B<-v>

Verbose (display depth and minimum size used, if any)

=item B<-d <depth>>

By default, all files are printed, which can get lengthy. With this
option, all changed files/directories deeper than I<depth> will be
summarized to the parent directory at level I<depth>.

=item B<-l>

Omit symbolic links from the summary.  For whatever reason, Time
Machine creates a new version of a symbolic link each and every time
it backs up.

=item B<-s>

Sort the output by current size.

=item B<-S <0..2>>

Sort by: 0=old size, 1=current size, 2=name (default)

=item B<-m <limit>>

Omit files/directories whose current size is less than limit (limit in
bytes by default, but can be specified as K, M, G, T for Kilo-, Mega-,
Giga-, Tera-, or Peta- Bytes).

=item B<-H>

Display sizes using base 10 sizes (default is base 2).

=item B<-n>

Use simple fixed width formatting (useful for spreadsheets or other
parsing).

=item B<-D <dir>>

Use specified directory as time machine backup directory. By default,
tmutil will be used to determine this from the system settings.

Note: provide the full path, to the root of the backups. E.g.:
/Volumes/TM/Backups.backupdb/hostname

=item B<timestamp:>

The backup directory for which you'd like to see the changed contents.  Defaults to the most recent (the one linked to by "Latest" in the backup directory)

=back

=head2 Acknowledgements

=over

=item Nathan Fielder:
 
Packaging and google code repo.

=item Lanny Rosicky:

Use of tmutil to locate the time machine backup directory.

=item Hayo Baan:

Code rewrite & bug fixes.
Additional help (-? or -h), verbose (-v), sort (-S), display (-H), and backup directory (-D) options.
Documentation and help (perlpod).

=back

=cut

################################################################################

use warnings;                   # Enable warnings
use strict;                     # Enable strict

use Pod::Usage;
use Fcntl ':mode';
use Getopt::Std;

use Cwd;
use File::Find;
no warnings "File::Find"; # Do not report File::Find errors (e.g., opendir errors due to lack of directory permissions)

# Usage and Help
sub usage { pod2usage(-exitstatus => 1, -verbose => 0); }
sub help  { pod2usage(-exitstatus => 0, -verbose => 2); }
*HELP_MESSAGE = *VERSION_MESSAGE = \&help;
$Getopt::Std::STANDARD_HELP_VERSION = 1;

################################################################################
# Option processing and Initialisation #########################################
################################################################################

# Process the options
our ($opt_h, $opt_t, $opt_v, $opt_d, $opt_l, $opt_s, $opt_S, $opt_m, $opt_H, $opt_n, $opt_D);
getopts('?htvd:lsS:m:HnD:') || usage();
{
  no strict 'refs';
  help() if ${'opt_?'} || $opt_h;
}

# Interpret depth option
usage() if defined $opt_d && $opt_d !~ /^[0-9]+$/;

# Interpret sort option
$opt_S //= 2;               # default sort by name
$opt_S = 1 if $opt_s;       # sort by current size if -s given
usage() if $opt_S && ($opt_S !~ /^[0-9]+$/ || $opt_S > 2);

# Interpret min size option
my $base = ($opt_H?1000.0:1024.0);
if ($opt_m) {
    my %bases=('K' => $base, 'M' => $base**2, 'G'=>$base**3, 'T'=>$base**4);
    usage if $opt_m !~ /^([0-9.]+)([KMGT]?)$/i;
    $opt_m = $1 || 1;
    $opt_m *= $bases{uc $2} if $2;
}

# Locate time machine backups
my $tmpath = $opt_D;
chomp($tmpath = `tmutil machinedirectory`) if !$tmpath;
die "No TimeMachine backups found" if !($tmpath && -d $tmpath);

# Get all available backups
my @backups=sort glob(qq("$tmpath/20[0-9][0-9]-[0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9]"));
map { s|^$tmpath/||; } @backups; 
die "None or only one Time Machine backups found" if @backups <= 1;

# With -t, just list all available backups and exit
if ($opt_t) {
  print join("\n",@backups),"\n";
  exit;
}

# Determine backups to compare
my ($last, $latest);
if (@ARGV) {
    $latest = $ARGV[0];
    $latest =~ s|/$||;
    $latest =~ s/([0-9]{4})-?([0-9]{2})-?([0-9]{2})-?([0-9]{6})/$1-$2-$3-$4/; # So that timestamp can be given with and without dashes
    for (@backups) {
        last if $_ eq $latest;
        $last=$_;
    }
    die "Invalid backup directory $latest specified" if !defined($last) || $last eq $latest;
} else {
    ($last,$latest)=@backups[$#backups-1..$#backups];
}

print "==> Comparing TM backup $latest to $last\n" unless $opt_n;
$last = "$tmpath/$last";
$latest = "$tmpath/$latest";

unless ($opt_n || !$opt_v) {
    print "  Depth: $opt_d directories\n" if defined $opt_d;
    print "  Omitting if smaller than: ", bytes($opt_m), "\n" if ($opt_m);
}

################################################################################
# Find all files/directories in latest backup and compare to previous ##########
################################################################################

my ($rsize,$rsize_old,$rcnt); # Counters/sizes for files/dirs below max depth
my $total_size = 0; # Total size of changed files/dirs in latest backup
my $total_cnt = 0;  # Total number of changed files/dirs in latest backup
my @summary = (); # List of summary lines


find(
    {
        wanted =>
            sub {
                (my $old=$_) =~ s/^$latest/$last/;
                (my $name=$_) =~ s/^$latest//;
                my (undef, $ino, $mode, undef, undef, undef, undef, $size) = lstat($_);
                # Silently ignore lstat errors (e.g., due to permission problems)
                $mode //= 0;
                $size //= 0;
                my ($ino_old, $size_old);
                if ($ino && -e $old) {
                    (undef, $ino_old, undef, undef, undef, undef, undef, $size_old) = lstat($old);
                    if ($ino_old && $ino == $ino_old) {
                        $File::Find::prune=1 if S_ISDIR($mode); # Prune matching dirs
                        return;
                    }
                }

                # Add found size to total size
                $total_size += $size;

                # With -l, skip symbolic links
                my $link = S_ISLNK($mode);
                return if $opt_l && $link;

                # Increase count
                $total_cnt++;

                if (defined $opt_d) {
                    my $depth = $name=~tr|/||;
                    $rcnt ||= 0;
                    if ($depth > $opt_d || (S_ISDIR($mode) && $depth == $opt_d)) {
                        # Count files in directory
                        $rsize += $size;
                        $rsize_old += $size_old if defined $size_old; # Silently ignore lstat errors (e.g., due to permission problems)
                        $rcnt++;
                        return # Post will handle summarizing the directory
                    }
                }
                # Add type indicator to name
                $name .= "/" if S_ISDIR($mode);
                $name .= "@" if $link;
                summarize($size,$size_old,$name);
            },
        preprocess =>
            (!defined $opt_d) ? 0:
            sub {
                (my $name=$File::Find::dir) =~ s/^$latest//;
                my $depth = $name =~ tr|/||;
                if ($depth <= $opt_d && !(-d && $depth == $opt_d)) {
                    # Starting a new printable directory level; zero out recursive sizes
                    $rsize=$rsize_old=$rcnt=undef;
                }
                @_;
            },
        postprocess =>
            (!defined $opt_d) ? 0:
            sub {
                (my $name=$File::Find::dir) =~ s/^$latest//;
                my $depth = $name =~ tr|/||;
                if ($depth == $opt_d) {
                    # This directory is at the given depth, summarize it
                    summarize($rsize,$rsize_old,"$name/",$rcnt) if $rsize || $rsize_old;
                    $rsize = $rsize_old = $rcnt = undef;
                }
            },
        no_chdir => 1,
    },
    $latest);

################################################################################
# Display the results ##########################################################
################################################################################

my $format = $opt_n ? 14 : 11;
my $div = $opt_n ? " " : " -> ";
my $cnt_l = $opt_n ? "" : "[";
my $cnt_r = $opt_n ? "" : "]";

# Header
printf("%${format}.${format}s%s%${format}.${format}s", "Old Size" , $div, "New Size");
printf(" %9s", "# in Dir") if defined $opt_d;
printf(" File/Directory\n");
if (!$opt_n) {
    print("=" x $format . $div . "=" x $format);
    print(" " . "=" x 9) if defined $opt_d;
    print(" " . "=" x 20 . "\n");
}

# List sorted summary lines
for (sort { ($opt_S != 2 ? ($a->[$opt_S]//-1) <=> ($b->[$opt_S]//-1) : 0) || $a->[2] cmp $b->[2]} @summary) {
    printf("%${format}s%s%${format}s", bytes($_->[0]), $div, bytes($_->[1]));
    printf(" %9s", ($_->[3]&&$_->[3]>1) ? $cnt_l .  ($_->[3]-1) . $cnt_r : "") if defined $opt_d;
    print " $_->[2]\n";
}

# Total
printf "==> Total Backup: $total_cnt changed files/directories, %s\n", bytes($total_size,".2") unless $opt_n;

exit 0;


################################################################################
# Helper functions #############################################################
################################################################################

# Function to format number of bytes
sub bytes {
    my $bytes=shift;
    my $format=shift || ".1";

    $bytes //= 0 if ($opt_n);
    if (defined $bytes) {
        my @suffix = ("", "K", "M", "G", "T", "P");
        my $suf = 0;

        while (!$opt_n && $bytes >= $base && $suf <= @suffix) {
            ++$suf;
            $bytes /= $base;
        }
        
        my $suffix = $suffix[$suf];
        $suffix .= "i" if $suf && !$opt_H;
        $suffix .= "B" if !$opt_n;
        return ($suf ? sprintf("%${format}f", $bytes) : sprintf("%d", $bytes)) . $suffix;
    } else {
        return "...";
    }
}

# Function to add the summary of a file/directory
sub summarize {
    my ($size,$size_old,$name,$cnt) = @_;
    return if $opt_m && $size < $opt_m;
    push @summary, [$size_old, $size, $name, $cnt];
}
